import json
from pprint import pprint

import requests

from mogujie.sqlalchemy_helper import save_goods


def get_page(page, action):
    """获取网页"""
    url = f'https://list.mogu.com/search?&cKey=15&page={page}&action={action}'
    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        # response.content返回的是字节流
        return response.content.decode('utf-8')
    return response.status_code


def parse_page(html, action):
    """解析网页"""
    # start_index = html.index('(')
    # html = html[start_index + 1:-2]
    # print(html)
    json_data = json.loads(html)
    is_end = json_data['result']['wall']['isEnd']  # 获取页面是否结束标记
    res = json_data['result']['wall']['docs']
    for item in res:
        item['category'] = action
        pprint(item['title'])
        save_goods(item)

    return is_end


def get_pages():
    """获取所有数据"""
    # actions = ['clothing', 'skirt', 'bags', 'shoes', 'neiyi', 'home', 'boyfriend']
    actions = ['clothing', 'bags']
    for action in actions:
        page = 1
        print('action:', action)
        print('-' * 20)
        while True:
            print('page:', page)
            html = get_page(page, action)
            is_end = parse_page(html, action)
            if is_end:
                break
            page += 1


def main():
    get_pages()


if __name__ == '__main__':
    main()
